Exploratory Analysis

In this section, I will explore the foot traffic data and the spend patterns data, and try to have more insights about the changes over time by visualizing them.

Important

Note: The data used in this analysis is from only July of each year, and for the sake of simplicity, I will use related years to name the data

Code
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
import altair as alt
import holoviews as hv
import hvplot.pandas
hv.extension('bokeh')
pd.set_option("display.float_format", "{:.2f}".format)
Code
# Load data
advan_sg = gpd.read_file('../data/advan_sg.geojson')
city = gpd.read_file('../data/City_Limits.geojson')

# City plot
city_plot = city.hvplot(geo=True, alpha=0.5, line_color='black', line_width=1, color='white', hover=False, crs=4326)

First, let’s load the data and have an overview of the average foot traffic and average spend over time in the table below.

Code
summary_table = advan_sg.groupby('DATE_RANGE_START').sum(numeric_only=True).drop(columns=['LATITUDE', 'LONGITUDE', 'RAW_NUM_CUSTOMERS'])

summary_table = summary_table.reset_index()

# Rename columns if needed
summary_table.columns = ['Year', 'Total Visits', 'Total Visitors', 'Total Spend', 'Total Transactions']

summary_table
Year Total Visits Total Visitors Total Spend Total Transactions
0 2019 5591954.00 3299734.00 18602269.20 730129.00
1 2020 1484566.00 921549.00 13145216.02 407680.00
2 2021 2524335.00 1589399.00 16743947.70 544171.00
3 2022 3953924.00 2468341.00 21524058.02 683882.00
4 2023 1919268.00 1275651.00 19350828.55 650189.00
5 2024 1548388.00 1033117.00 18474501.11 610346.00

Total Number of Visits

The chart shows the total number of visits over time. The number of visits first decreased vastly in 2020 due to the COVID-19 pandemic, and then started to recover in 2021 and reached a peak in 2022. However, the number of visits started to decrease again in 2022. In 2024, the number of visits is almost the same as in 2020.

Code
# Adjusted axis and title font properties
axis_fontdict = {'fontsize': 12, 'family': 'Cambria', 'color': 'white'}
title_fontdict = {'fontsize': 14, 'fontweight': 'bold', 'family': 'Cambria', 'color': 'white'}

fig, ax = plt.subplots(figsize=(6, 4))

# Plotting
summary_table.plot(
    x='Year', 
    y='Total Visits',
    marker='o',
    color='#ea00d9',
    legend=False,
    ax=ax
)

# Setting axis labels and title
ax.set_xlabel('Year', fontdict=axis_fontdict)
ax.set_ylabel('Total Visits', fontdict=axis_fontdict)
ax.set_title('Total Number of Visits, Philadelphia', fontdict=title_fontdict, pad=20)

# Customize axis colors
ax.xaxis.label.set_color('white')
ax.yaxis.label.set_color('white')
ax.tick_params(axis='x', colors='white')  # X-axis ticks
ax.tick_params(axis='y', colors='white')  # Y-axis ticks

# Customize spines
ax.spines['bottom'].set_color('white') 
ax.spines['left'].set_color('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Customize ticks
ax.set_xticks(range(2019, 2025, 1))
ax.set_yticks(range(0, 7500000, 1500000))

# Turn off scientific notation on y-axis
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:,.0f}"))

# Set the background colors
fig.set_facecolor('#091833')
ax.set_facecolor('#091833')

plt.show()

Here I map the total number of visits in Philadelphia from 2019 to 2024 using hvplot, and you can drag the slider to see the changes over time. It can be seen that the most visited places are in the Center City area, and the number of yellow dots (representing a higher number of visits) dropped since 2019, which is aligned with the conclusion from the line chart.

Code
# Sort by visit counts so that the largest points are plotted on top
visits = advan_sg.copy().sort_values(by='RAW_VISIT_COUNTS')

visit_plot = visits.hvplot(
    geo=True,
    c='RAW_VISIT_COUNTS', 
    cmap='viridis', 
    hover_cols=['LOCATION_NAME', 'RAW_VISIT_COUNTS'], 
    groupby='DATE_RANGE_START',
    dynamic=False,
    width=800,
    height=600,
    crs=4326,
    title="Number of Visits by Place",
    xlabel="Longitude",
    ylabel="Latitude",
    line_color='white',
    line_width=0.3,
    clim=(0, 5000)
)

city_plot * visit_plot

Total Number of Visitors

The chart and the map show the total number of visitors over time, and we can found that there is no suprise that the trend and spatial pattern is very similar to the total number of visits.

Code
# Adjusted axis and title font properties
axis_fontdict = {'fontsize': 12, 'family': 'Cambria', 'color': 'white'}
title_fontdict = {'fontsize': 14, 'fontweight': 'bold', 'family': 'Cambria', 'color': 'white'}

fig, ax = plt.subplots(figsize=(6, 4))

# Plotting
summary_table.plot(
    x='Year', 
    y='Total Visitors',
    marker='o',
    color='#ea00d9',
    legend=False,
    ax=ax
)

# Setting axis labels and title
ax.set_xlabel('Year', fontdict=axis_fontdict)
ax.set_ylabel('Total Visitors', fontdict=axis_fontdict)
ax.set_title('Total Number of Visitors, Philadelphia', fontdict=title_fontdict, pad=20)

# Customize axis colors
ax.xaxis.label.set_color('white')
ax.yaxis.label.set_color('white')
ax.tick_params(axis='x', colors='white')  # X-axis ticks
ax.tick_params(axis='y', colors='white')  # Y-axis ticks

# Customize spines
ax.spines['bottom'].set_color('white') 
ax.spines['left'].set_color('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Customize ticks
ax.set_xticks(range(2019, 2025, 1))
ax.set_yticks(range(0, 5000000, 1000000))

# Turn off scientific notation on y-axis
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:,.0f}"))

# Set the background colors
fig.set_facecolor('#091833')
ax.set_facecolor('#091833')

plt.show()

Code
# Sort by visitor counts so that the largest points are plotted on top
visitors = advan_sg.copy().sort_values(by='RAW_VISITOR_COUNTS')

visitor_plot = visits.hvplot(
    geo=True,
    c='RAW_VISITOR_COUNTS', 
    cmap='viridis', 
    hover_cols=['LOCATION_NAME', 'RAW_VISITOR_COUNTS'], 
    groupby='DATE_RANGE_START',
    dynamic=False,
    width=800,
    height=600,
    crs=4326,
    title="Number of Visitors by Place",
    xlabel="Longitude",
    ylabel="Latitude",
    line_color='white',
    line_width=0.3,
    clim=(0, 5000)
)

city_plot * visitor_plot

Total Income (Spend)

The spend data is aggregated by places, so it can also be seen as the income of the store.

The chart shows the total income over time. The trend is kind of different to the total number of visits. The total income first decreased vastly in 2020 and then started to recover in 2021 and reached a peak in 2022. However, the number of visits started to decrease again in 2022 but seemed to stay steady at the same level of pre-pandemic period. Recall that some reports mentioned that there were signs of a turnaround in 2023 in the background section. It seems our data also supports this conclusion. But is this really the case? I will further investigate this in the next section.

Code
# Adjusted axis and title font properties
axis_fontdict = {'fontsize': 12, 'family': 'Cambria', 'color': 'white'}
title_fontdict = {'fontsize': 14, 'fontweight': 'bold', 'family': 'Cambria', 'color': 'white'}

fig, ax = plt.subplots(figsize=(6, 4))

# Plotting
summary_table.plot(
    x='Year', 
    y='Total Spend',
    marker='o',
    color='#ea00d9',
    legend=False,
    ax=ax
)

# Setting axis labels and title
ax.set_xlabel('Year', fontdict=axis_fontdict)
ax.set_ylabel('Total Income', fontdict=axis_fontdict)
ax.set_title('Total Income, Philadelphia', fontdict=title_fontdict, pad=20)

# Customize axis colors
ax.xaxis.label.set_color('white')
ax.yaxis.label.set_color('white')
ax.tick_params(axis='x', colors='white')  # X-axis ticks
ax.tick_params(axis='y', colors='white')  # Y-axis ticks

# Customize spines
ax.spines['bottom'].set_color('white') 
ax.spines['left'].set_color('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Customize ticks
ax.set_xticks(range(2019, 2025, 1))
ax.set_yticks(range(0, 30000000, 5000000))

# Turn off scientific notation on y-axis
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:,.0f}"))

# Set the background colors
fig.set_facecolor('#091833')
ax.set_facecolor('#091833')

plt.show()

In terms of the spatial pattern, the map shows that the total income is higher in the Center City area, which is consistent with the total number of visits. However, it can also be seen that the total income is high in the northwestern and northeastern parts of the city, which may indicate that the average spend or the average number of transactions per visit is higher in these areas.

Code
# Sort by visit counts so that the largest points are plotted on top
income = advan_sg.copy().sort_values(by='RAW_TOTAL_SPEND')

income_plot = visits.hvplot(
    geo=True,
    c='RAW_TOTAL_SPEND', 
    cmap='viridis', 
    hover_cols=['LOCATION_NAME', 'RAW_TOTAL_SPEND'], 
    groupby='DATE_RANGE_START',
    dynamic=False,
    width=800,
    height=600,
    crs=4326,
    title="Total Income by Place",
    xlabel="Longitude",
    ylabel="Latitude",
    line_color='white',
    line_width=0.3,
    clim=(0, 20000)
)

city_plot * income_plot

Total Number of Transactions

The chart shows the total number of transactions over time, and we can found that there is no suprise that the trend and spatial pattern is very similar to the total spend.

Code
# Adjusted axis and title font properties
axis_fontdict = {'fontsize': 12, 'family': 'Cambria', 'color': 'white'}
title_fontdict = {'fontsize': 14, 'fontweight': 'bold', 'family': 'Cambria', 'color': 'white'}

fig, ax = plt.subplots(figsize=(6, 4))

# Plotting
summary_table.plot(
    x='Year', 
    y='Total Transactions',
    marker='o',
    color='#ea00d9',
    legend=False,
    ax=ax
)

# Setting axis labels and title
ax.set_xlabel('Year', fontdict=axis_fontdict)
ax.set_ylabel('Total Transactions', fontdict=axis_fontdict)
ax.set_title('Total Number of Transactions, Philadelphia', fontdict=title_fontdict, pad=20)

# Customize axis colors
ax.xaxis.label.set_color('white')
ax.yaxis.label.set_color('white')
ax.tick_params(axis='x', colors='white')  # X-axis ticks
ax.tick_params(axis='y', colors='white')  # Y-axis ticks

# Customize spines
ax.spines['bottom'].set_color('white') 
ax.spines['left'].set_color('white')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Customize ticks
ax.set_xticks(range(2019, 2025, 1))
ax.set_yticks(range(0, 1000000, 200000))

# Turn off scientific notation on y-axis
ax.get_yaxis().get_major_formatter().set_scientific(False)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:,.0f}"))

# Set the background colors
fig.set_facecolor('#091833')
ax.set_facecolor('#091833')

plt.show()

Code
# Sort by transaction counts so that the largest points are plotted on top
transaction = advan_sg.copy().sort_values(by='RAW_NUM_TRANSACTIONS')

transaction_plot = visits.hvplot(
    geo=True,
    c='RAW_NUM_TRANSACTIONS', 
    cmap='viridis', 
    hover_cols=['LOCATION_NAME', 'RAW_NUM_TRANSACTIONS'], 
    groupby='DATE_RANGE_START',
    dynamic=False,
    width=800,
    height=600,
    crs=4326,
    title="Number of Transactions by Place",
    xlabel="Longitude",
    ylabel="Latitude",
    line_color='white',
    line_width=0.3,
    clim=(0, 1000)
)

city_plot * transaction_plot